/********************************************************************************
Title: Data Preparation for Bangladesh Sample
		Define time points
		Define variables to use in each index
		Define survey round to use as reference round
********************************************************************************/

loc ref month == 1

*----------------------------------

*1: Baseline survey: 27 August 2019 - 10 October 2019

*2. Endline survey: 27 August 2020-30 September 2020

use "${raw}/bangladesh/bangladesh_desc", clear
keep hh_no age *_19
ren *_19 *
ren q3_* ses*
replace ses2 = -1*ses2
replace ses3 = -1*ses3
keep hh_no age ses*
tempfile desc
save `desc'

use  "${raw}/bangladesh/bangladesh.dta", clear
preserve
keep hh_no *_19
ren *_19 *


*-------------------------------------------------------------------------------
*			0. Formating and Renaming Variables
*-------------------------------------------------------------------------------
gen date = mdy(9,20,2019)
tempfile pre
save `pre'
restore

keep hh_no BLOCK_19 POCKET_19 *_20
ren *_20 * 
gen date = mdy(9,20,2020)
append using `pre'


*-------------------------------------------------------------------------------
*			0.1	Heterogeneity Variables
*-------------------------------------------------------------------------------
merge m:1 hh_no using `desc'
gl sesvars 
foreach var of varlist ses* {
	tab `var', gen(`var'_)
	if !regexm("`var'","(ses1|ses2|ses3)") gl sesvars $sesvars `var'_?
	if regexm("`var'","(ses1|ses2|ses3)") gl sesvars $sesvars `var'
}

pca $sesvars, factor(1)
predict ses
/*
foreach var of varlist ses? {
	if !regexm("`var'","(ses1|ses2|ses3)") reg ses i.`var'
	if regexm("`var'","(ses1|ses2|ses3)") reg ses `var'
}
*/

egen highses = cut(ses), group(2)

*			0.1		PID and HHID
*-------------------------------------------------------------------------------
encode hh_no, gen(pid)
ren hh_no hhid
keep if !mi(pid)
keep if !mi(date)
isid pid date

*			0.2		Month
*-------------------------------------------------------------------------------
gen month = month(date) - 8 + 12*year(date) - 12*2019
tab month


*-------------------------------------------------------------------------------
*			1. Generating Variable Lists
*-------------------------------------------------------------------------------
macro drop depression 
gl depression qm32_1 qm32_4 qm32_7 qm32_8 qm32_9 qm32_10 qm33_2-qm33_19


di "$depression" 


*-------------------------------------------------------------------------------
*			2. Generating Indices
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
*          CESD 20 item score
*-------------------------------------------------------------------------------
egen cesd_score = rsum(qm33_*)  if qm33_2<. //  higher score shows worse MH
lab var cesd_score  "Depression score” 

* Checking Cronbach's alpha
*Validated tool CESD-20
alpha qm33_* // Scale reliability coefficient= 0.948
*Weighted depression score
alpha qm32_1 qm32_4 qm32_7 qm32_8 qm32_9 qm32_10 qm33_2-qm33_19 // Scale reliability coefficient= 0.936

gen ___depression_nw = cesd_score


foreach var of varlist $depression {
	replace `var' = -1*`var'
	sum `var' if `ref'
	replace `var' = (`var' - `r(mean)')/`r(sd)'
	gen swt_`var' = `var'
}


factor $depression if `ref',  ipf factor(1)
predict ___depression_fw

*-------------------------------------------------------------------------------
*          Unweighted indice
*-------------------------------------------------------------------------------


*-------------------------------------------------------------------------------
*          Inverse-covariance-weighted indice
*-------------------------------------------------------------------------------
icw_index($depression), gen(___depression_icw)

desc ___depression*

reg ___depression_nw highses

